package com.tifa.mark.utils;

import java.io.FileInputStream;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.ooxml.POIXMLDocument;
import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;

public class ReadWord {
  public static String getWordText(InputStream inputStream, String name) {
    String text = StringUtils.EMPTY;
    try {
      if (name.endsWith("doc")) {
        WordExtractor ex = new WordExtractor(inputStream);
        text = ex.getText();
        ex.close();
      } else if (name.endsWith("docx")) {
        POIXMLTextExtractor extractor = new XWPFWordExtractor(OPCPackage.open(inputStream));
        text = extractor.getText();
        extractor.close();
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
    return text;
  }

  @Deprecated
  public static String getWordText(String path) {
    try {
      path = URLDecoder.decode(path, "UTF-8");
    } catch (UnsupportedEncodingException e) {
      throw new RuntimeException(e);
    }
    String text = "";
    if (path.endsWith(".doc")) {
      WordExtractor ex;
      try {
        InputStream is = new FileInputStream(path);
        ex = new WordExtractor(is);
        text = ex.getText();
        ex.close();
      } catch (Exception e) {
        e.printStackTrace();
      }
    } else if (path.endsWith(".docx")) {
      POIXMLTextExtractor extractor = null;
      try {
        OPCPackage opcPackage = POIXMLDocument.openPackage(path);
        extractor = new XWPFWordExtractor(opcPackage);
        text = extractor.getText();
        extractor.close();
        opcPackage.close();
      } catch (Exception e) {
        e.printStackTrace();
      }
    }
    return text;
  }

}
